############################################################################
# Mahalanobis Matching for Preprocessing Data
# Becka Brolinson
# Last update: 2/28/2018
############################################################################


## Delete all objects from the environment
rm(list=ls(all=TRUE)) 

setwd("P:/Efficiency Financing/NCREIF/Summer_2017/R")
sink("P:/Efficiency Financing/NCREIF/Summer_2017/R/BalanceLog11042016.txt")

## INITIAL COMMANDS
library(foreign)
library(rgenoud)
library(Matching)
library(readstata13)

## Read cross-sectional data for Cobb/Gwin/Fulton Matching
cgf <- read.dta13("P:/Efficiency Financing/NCREIF/Summer_2017/Generated_Data/NCREIF_CleanedForMatch1.dta")

##Make list of Covariates to Match on, testing wihtout matching on pre-treatment outcomes 
cov.13 <- c("real_elecprice2000", "real_gasprice2000", "Unemployment2000",
              "HDD2000", "CDD2000",  "initialcost2000",  "nooffloors2000",  "noofbuildings2000",  "nra2000",  "sqft2000",  "yrbuilt", 
              "lastrenovatedyear2000", "yrbuiltorlastren2000",  "Covered_E2000",  "latitude2000", "longitude2000",  
              "real_elecprice2001",  "real_gasprice2001",  "Unemployment2001",  
              "HDD2001",  "CDD2001",  "initialcost2001", "nooffloors2001",  "noofbuildings2001", "nra2001", "sqft2001", 
              "lastrenovatedyear2001",  "yrbuiltorlastren2001", "Covered_E2001", "latitude2001", "longitude2001",
              "real_elecprice2002", "real_gasprice2002", "Unemployment2002", 
              "HDD2002", "CDD2002", "initialcost2002", "nooffloors2002", "noofbuildings2002", "nra2002", "sqft2002", 
              "lastrenovatedyear2002", "yrbuiltorlastren2002", "Covered_E2002", "latitude2002", "longitude2002",
              "real_elecprice2003", "real_gasprice2003", "Unemployment2003", 
              "HDD2003", "CDD2003", "initialcost2003", "nooffloors2003", "noofbuildings2003", "nra2003", "sqft2003",  
              "lastrenovatedyear2003", "yrbuiltorlastren2003", "Covered_E2003", "latitude2003", "longitude2003",
              "real_elecprice2004", "real_gasprice2004", "Unemployment2004", 
              "HDD2004", "CDD2004", "initialcost2004", "nooffloors2004", "noofbuildings2004",  "nra2004", "sqft2004",
              "lastrenovatedyear2004", "yrbuiltorlastren2004", "Covered_E2004", "latitude2004", "longitude2004",
              "real_elecprice2005", "real_gasprice2005", "Unemployment2005", 
              "HDD2005", "CDD2005", "initialcost2005", "nooffloors2005", "noofbuildings2005", "nra2005", "sqft2005", 
              "lastrenovatedyear2005", "yrbuiltorlastren2005", "Covered_E2005", "latitude2005", "longitude2005",
              "real_elecprice2006", "real_gasprice2006", "Unemployment2006", 
              "HDD2006", "CDD2006", "initialcost2006", "nooffloors2006", "noofbuildings2006", "nra2006", "sqft2006",  
              "lastrenovatedyear2006", "yrbuiltorlastren2006", "Covered_E2006", "latitude2006", "longitude2006",
              "real_elecprice2007", "real_gasprice2007", "Unemployment2007", 
              "HDD2007", "CDD2007", "initialcost2007", "nooffloors2007", "noofbuildings2007", "nra2007", "sqft2007",  
              "lastrenovatedyear2007", "yrbuiltorlastren2007", "Covered_E2007", "latitude2007", "longitude2007",
              "real_elecprice2008", "real_gasprice2008", "Unemployment2008", 
              "HDD2008", "CDD2008", "initialcost2008", "nooffloors2008", "noofbuildings2008", "nra2008", "sqft2008",  
              "lastrenovatedyear2008", "yrbuiltorlastren2008", "Covered_E2008", "latitude2008", "longitude2008",
              "real_elecprice2009", "real_gasprice2009", "Unemployment2009", 
              "HDD2009", "CDD2009", "initialcost2009", "nooffloors2009", "noofbuildings2009", "nra2009", "sqft2009",  
              "lastrenovatedyear2009", "yrbuiltorlastren2009", "Covered_E2009", "latitude2009", "longitude2009",
              "real_elecprice2010", "real_gasprice2010", "Unemployment2010", 
              "HDD2010", "CDD2010", "initialcost2010", "nooffloors2010", "noofbuildings2010", "nra2010", "sqft2010",  
              "lastrenovatedyear2010", "yrbuiltorlastren2010", "Covered_E2010", "latitude2010", "longitude2010",
              "real_elecprice2011", "real_gasprice2011", "Unemployment2011", 
              "HDD2011", "CDD2011", "initialcost2011", "nooffloors2011", "noofbuildings2011", "nra2011", "sqft2011",  
              "lastrenovatedyear2011", "yrbuiltorlastren2011", "Covered_E2011", "latitude2011", "longitude2011",
              "real_elecprice2012", "real_gasprice2012", "Unemployment2012", 
              "HDD2012", "CDD2012", "initialcost2012", "nooffloors2012", "noofbuildings2012", "nra2012", "sqft2012",  
              "lastrenovatedyear2012", "yrbuiltorlastren2012", "Covered_E2012", "latitude2012", "longitude2012",
              "real_elecprice2013", "real_gasprice2013", "Unemployment2013", 
              "HDD2013", "CDD2013", "initialcost2013", "nooffloors2013", "noofbuildings2013", "nra2013", "sqft2013", 
              "lastrenovatedyear2013", "yrbuiltorlastren2013", "Covered_E2013", "latitude2013", "longitude2013",
              "real_elecprice2014", "real_gasprice2014", "Unemployment2014", 
              "HDD2014", "CDD2014", "initialcost2014", "nooffloors2014", "noofbuildings2014", "nra2014", "sqft2014", 
              "lastrenovatedyear2014", "yrbuiltorlastren2014", "Covered_E2014", "latitude2014", "longitude2014",
              "real_elecprice2015", "real_gasprice2015", "Unemployment2015", 
              "HDD2015", "CDD2015", "initialcost2015", "nooffloors2015", "noofbuildings2015", "nra2015", "sqft2015", 
              "lastrenovatedyear2015", "yrbuiltorlastren2015", "Covered_E2015", "latitude2015", "longitude2015"
              )

# variables for pscore estimation
treat    <- cgf$treat
Y       <- cgf$percentleased2000
#Replace missing values with very high number 
cgf[is.na(cgf)] <- 1000000000

## MAHA MATCHING BELOW
#########################
## FULL SAMPLE -> MATCHED **WITHOUT** CALIPERS
print("---treat3, matched, no caliper, GWIN---")
match.cal <- Match(Tr=treat, X=cgf[cov.13], Weight=2, replace=FALSE, version='fast', caliper = TRUE)
summary(match.cal)
tobs <- cgf[match.cal$index.treated,]
cobs <- cgf[match.cal$index.control,]
tobs$weight <- rep(1, nrow(tobs))
cobs$weight <- match.cal$weights
matched.cal <- rbind(tobs, cobs)
write.table(matched.cal, "P:/Efficiency Financing/NCREIF/Summer_2017/R/2018_2_27_maha_match_cal.csv", sep=";",col.names=NA, row.names=TRUE)


###### 
#Try matching just on time-invariant characteristics 

## Delete all objects from the environment
rm(list=ls(all=TRUE)) 
#Updating FilePaths 

setwd("/Users/BeckaBrolinson/Dropbox/NCREIF/programs/R")
sink("/Users/BeckaBrolinson/Dropbox/NCREIF/programs/RBalanceLog11042016.txt")

#Install packages 
#install.packages("foreign")
#install.packages("rgenoud")
#install.packages("Matching")
#install.packages("readstata13")

## INITIAL COMMANDS
library(foreign)
library(rgenoud)
library(Matching)
library(readstata13)

## Read cross-sectional data for Cobb/Gwin/Fulton Matching
#UPdating filepaths 
cgf <- read.dta13("/Users/BeckaBrolinson/Dropbox/NCREIF/data/build/NCREIF_CleanedForMatch2.dta")

##Make list of Covariates to Match on, testing without matching on pre-treatment outcomes 
cov.13 <- c("mean_nooffloors", "mean_noofunits", "mean_noofbuildings", "mean_nra", "mean_sqft", "mean_yrbuilt", 
            "mean_lastrenovatedyear",  "mean_yrbuiltorlastren",  "mean_Covered_E",  "mean_latitude", "mean_longitude") 

# variables for pscore estimation
treat    <- cgf$treat
Y       <- cgf$percentleased2000
#Replace missing values with very high number 
cgf[is.na(cgf)] <- 10000000000000


## FULL SAMPLE -> MATCHED **WITH** CALIPERS
print("---treat3, matched, no caliper, GWIN---")
match.cal <- Match(Tr=treat, X=cgf[cov.13], Weight=2, replace=FALSE, version='fast', caliper = TRUE)

summary(match.cal)
matchedobs <- data.frame(match.cal[["index.treated"]],match.cal[["index.control"]])
tobs <- cgf[match.cal$index.treated,]
tobs <- data.frame(tobs,matchedobs)

cobs <- cgf[match.cal$index.control,]
tobs$weight <- rep(1, nrow(tobs))
cobs$weight <- match.cal$weights
matched.cal <- rbind(tobs, cobs)
write.table(matched.cal, "P:/Efficiency Financing/NCREIF/Summer_2017/R/2018_3_29_maha_match_timeinvar_cal.csv", sep=";",col.names=NA, row.names=TRUE)

